{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extract Sentiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "y:\\Python Scripts\\pandas-survey-toolkit\\.venv\\Lib\\site-packages\\sentence_transformers\\cross_encoder\\CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from tqdm.autonotebook import tqdm, trange\n",
      "y:\\Python Scripts\\pandas-survey-toolkit\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:311: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
      "  torch.utils._pytree._register_pytree_node(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original data:\n"
     ]
    },
    {
     "data": {
      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
       "columns": [
        {
         "name": "index",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "respondent_id",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "comments",
         "rawType": "object",
         "type": "string"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "f35dee40-98d3-453d-824b-89cd6a0d4fac",
       "rows": [
        [
         "0",
         "1",
         "I really enjoyed using this product. It exceeded my expectations."
        ],
        [
         "1",
         "2",
         "The customer service was terrible and the product broke after a week."
        ],
        [
         "2",
         "3",
         "It was okay, nothing special but did the job I needed it to do."
        ],
        [
         "3",
         "4",
         "Absolutely love this! Best purchase I've made all year."
        ],
        [
         "4",
         "5",
         "I'm disappointed with the quality compared to what was advertised."
        ],
        [
         "5",
         "6",
         "It's fine I guess, but I probably wouldn't buy it again."
        ],
        [
         "6",
         "7",
         "Fantastic product and great value for money."
        ],
        [
         "7",
         "8",
         "This is rubbish. Complete waste of money and time."
        ],
        [
         "8",
         "9",
         "I'm neither happy nor unhappy with this purchase."
        ],
        [
         "9",
         "10",
         "Well designed and does exactly what it says on the tin!"
        ]
       ],
       "shape": {
        "columns": 2,
        "rows": 10
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>respondent_id</th>\n",
       "      <th>comments</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>I really enjoyed using this product. It exceed...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>The customer service was terrible and the prod...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>It was okay, nothing special but did the job I...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Absolutely love this! Best purchase I've made ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>I'm disappointed with the quality compared to ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>It's fine I guess, but I probably wouldn't buy...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>Fantastic product and great value for money.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>This is rubbish. Complete waste of money and t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>I'm neither happy nor unhappy with this purchase.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>Well designed and does exactly what it says on...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   respondent_id                                           comments\n",
       "0              1  I really enjoyed using this product. It exceed...\n",
       "1              2  The customer service was terrible and the prod...\n",
       "2              3  It was okay, nothing special but did the job I...\n",
       "3              4  Absolutely love this! Best purchase I've made ...\n",
       "4              5  I'm disappointed with the quality compared to ...\n",
       "5              6  It's fine I guess, but I probably wouldn't buy...\n",
       "6              7       Fantastic product and great value for money.\n",
       "7              8  This is rubbish. Complete waste of money and t...\n",
       "8              9  I'm neither happy nor unhappy with this purchase.\n",
       "9             10  Well designed and does exactly what it says on..."
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from pandas_survey_toolkit import nlp\n",
    "# Create sample survey data with open-ended comments\n",
    "data = {\n",
    "    'respondent_id': range(1, 11),\n",
    "    'comments': [\n",
    "        \"I really enjoyed using this product. It exceeded my expectations.\",\n",
    "        \"The customer service was terrible and the product broke after a week.\",\n",
    "        \"It was okay, nothing special but did the job I needed it to do.\",\n",
    "        \"Absolutely love this! Best purchase I've made all year.\",\n",
    "        \"I'm disappointed with the quality compared to what was advertised.\",\n",
    "        \"It's fine I guess, but I probably wouldn't buy it again.\",\n",
    "        \"Fantastic product and great value for money.\",\n",
    "        \"This is rubbish. Complete waste of money and time.\",\n",
    "        \"I'm neither happy nor unhappy with this purchase.\",\n",
    "        \"Well designed and does exactly what it says on the tin!\"\n",
    "    ]\n",
    "}\n",
    "\n",
    "# Create DataFrame\n",
    "df = pd.DataFrame(data)\n",
    "\n",
    "# Display the original data\n",
    "print(\"Original data:\")\n",
    "display(df)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "y:\\Python Scripts\\pandas-survey-toolkit\\.venv\\Lib\\site-packages\\transformers\\modeling_utils.py:484: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  return torch.load(checkpoint_file, map_location=map_location)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Data with sentiment analysis:\n"
     ]
    },
    {
     "data": {
      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
       "columns": [
        {
         "name": "index",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "respondent_id",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "comments",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "positive",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "neutral",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "negative",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "sentiment",
         "rawType": "object",
         "type": "string"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "dcc84147-05c9-4114-8e90-d1cbcee94337",
       "rows": [
        [
         "0",
         "1",
         "I really enjoyed using this product. It exceeded my expectations.",
         "0.9882898330688477",
         "0.009408515878021717",
         "0.00230163661763072",
         "positive"
        ],
        [
         "1",
         "2",
         "The customer service was terrible and the product broke after a week.",
         "0.0020007879938930273",
         "0.017636312171816826",
         "0.9803629517555237",
         "negative"
        ],
        [
         "2",
         "3",
         "It was okay, nothing special but did the job I needed it to do.",
         "0.8576692938804626",
         "0.13254237174987793",
         "0.009788259863853455",
         "positive"
        ],
        [
         "3",
         "4",
         "Absolutely love this! Best purchase I've made all year.",
         "0.9912731051445007",
         "0.007251274306327105",
         "0.0014755873708054423",
         "positive"
        ],
        [
         "4",
         "5",
         "I'm disappointed with the quality compared to what was advertised.",
         "0.002319690538570285",
         "0.030413493514060974",
         "0.9672667980194092",
         "negative"
        ],
        [
         "5",
         "6",
         "It's fine I guess, but I probably wouldn't buy it again.",
         "0.05289428308606148",
         "0.44394204020500183",
         "0.5031636953353882",
         "negative"
        ],
        [
         "6",
         "7",
         "Fantastic product and great value for money.",
         "0.9762622117996216",
         "0.02090422250330448",
         "0.002833594800904393",
         "positive"
        ],
        [
         "7",
         "8",
         "This is rubbish. Complete waste of money and time.",
         "0.004193859174847603",
         "0.02323022112250328",
         "0.9725759029388428",
         "negative"
        ],
        [
         "8",
         "9",
         "I'm neither happy nor unhappy with this purchase.",
         "0.07574984431266785",
         "0.32749032974243164",
         "0.5967597961425781",
         "negative"
        ],
        [
         "9",
         "10",
         "Well designed and does exactly what it says on the tin!",
         "0.7589830756187439",
         "0.21950559318065643",
         "0.02151135914027691",
         "positive"
        ]
       ],
       "shape": {
        "columns": 6,
        "rows": 10
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>respondent_id</th>\n",
       "      <th>comments</th>\n",
       "      <th>positive</th>\n",
       "      <th>neutral</th>\n",
       "      <th>negative</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>I really enjoyed using this product. It exceed...</td>\n",
       "      <td>0.988290</td>\n",
       "      <td>0.009409</td>\n",
       "      <td>0.002302</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>The customer service was terrible and the prod...</td>\n",
       "      <td>0.002001</td>\n",
       "      <td>0.017636</td>\n",
       "      <td>0.980363</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>It was okay, nothing special but did the job I...</td>\n",
       "      <td>0.857669</td>\n",
       "      <td>0.132542</td>\n",
       "      <td>0.009788</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Absolutely love this! Best purchase I've made ...</td>\n",
       "      <td>0.991273</td>\n",
       "      <td>0.007251</td>\n",
       "      <td>0.001476</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>I'm disappointed with the quality compared to ...</td>\n",
       "      <td>0.002320</td>\n",
       "      <td>0.030413</td>\n",
       "      <td>0.967267</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>It's fine I guess, but I probably wouldn't buy...</td>\n",
       "      <td>0.052894</td>\n",
       "      <td>0.443942</td>\n",
       "      <td>0.503164</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>Fantastic product and great value for money.</td>\n",
       "      <td>0.976262</td>\n",
       "      <td>0.020904</td>\n",
       "      <td>0.002834</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>This is rubbish. Complete waste of money and t...</td>\n",
       "      <td>0.004194</td>\n",
       "      <td>0.023230</td>\n",
       "      <td>0.972576</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>I'm neither happy nor unhappy with this purchase.</td>\n",
       "      <td>0.075750</td>\n",
       "      <td>0.327490</td>\n",
       "      <td>0.596760</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>Well designed and does exactly what it says on...</td>\n",
       "      <td>0.758983</td>\n",
       "      <td>0.219506</td>\n",
       "      <td>0.021511</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   respondent_id                                           comments  positive  \\\n",
       "0              1  I really enjoyed using this product. It exceed...  0.988290   \n",
       "1              2  The customer service was terrible and the prod...  0.002001   \n",
       "2              3  It was okay, nothing special but did the job I...  0.857669   \n",
       "3              4  Absolutely love this! Best purchase I've made ...  0.991273   \n",
       "4              5  I'm disappointed with the quality compared to ...  0.002320   \n",
       "5              6  It's fine I guess, but I probably wouldn't buy...  0.052894   \n",
       "6              7       Fantastic product and great value for money.  0.976262   \n",
       "7              8  This is rubbish. Complete waste of money and t...  0.004194   \n",
       "8              9  I'm neither happy nor unhappy with this purchase.  0.075750   \n",
       "9             10  Well designed and does exactly what it says on...  0.758983   \n",
       "\n",
       "    neutral  negative sentiment  \n",
       "0  0.009409  0.002302  positive  \n",
       "1  0.017636  0.980363  negative  \n",
       "2  0.132542  0.009788  positive  \n",
       "3  0.007251  0.001476  positive  \n",
       "4  0.030413  0.967267  negative  \n",
       "5  0.443942  0.503164  negative  \n",
       "6  0.020904  0.002834  positive  \n",
       "7  0.023230  0.972576  negative  \n",
       "8  0.327490  0.596760  negative  \n",
       "9  0.219506  0.021511  positive  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Sentiment distribution:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "sentiment\n",
       "positive    5\n",
       "negative    5\n",
       "Name: count, dtype: int64"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Most positive comments:\n"
     ]
    },
    {
     "data": {
      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
       "columns": [
        {
         "name": "index",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "comments",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "positive",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "neutral",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "negative",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "sentiment",
         "rawType": "object",
         "type": "string"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "ba2109dd-776b-4a9c-bb15-dbe6b1b8a357",
       "rows": [
        [
         "3",
         "Absolutely love this! Best purchase I've made all year.",
         "0.9912731051445007",
         "0.007251274306327105",
         "0.0014755873708054423",
         "positive"
        ],
        [
         "0",
         "I really enjoyed using this product. It exceeded my expectations.",
         "0.9882898330688477",
         "0.009408515878021717",
         "0.00230163661763072",
         "positive"
        ],
        [
         "6",
         "Fantastic product and great value for money.",
         "0.9762622117996216",
         "0.02090422250330448",
         "0.002833594800904393",
         "positive"
        ]
       ],
       "shape": {
        "columns": 5,
        "rows": 3
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>comments</th>\n",
       "      <th>positive</th>\n",
       "      <th>neutral</th>\n",
       "      <th>negative</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Absolutely love this! Best purchase I've made ...</td>\n",
       "      <td>0.991273</td>\n",
       "      <td>0.007251</td>\n",
       "      <td>0.001476</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>I really enjoyed using this product. It exceed...</td>\n",
       "      <td>0.988290</td>\n",
       "      <td>0.009409</td>\n",
       "      <td>0.002302</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Fantastic product and great value for money.</td>\n",
       "      <td>0.976262</td>\n",
       "      <td>0.020904</td>\n",
       "      <td>0.002834</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            comments  positive   neutral  \\\n",
       "3  Absolutely love this! Best purchase I've made ...  0.991273  0.007251   \n",
       "0  I really enjoyed using this product. It exceed...  0.988290  0.009409   \n",
       "6       Fantastic product and great value for money.  0.976262  0.020904   \n",
       "\n",
       "   negative sentiment  \n",
       "3  0.001476  positive  \n",
       "0  0.002302  positive  \n",
       "6  0.002834  positive  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Most negative comments:\n"
     ]
    },
    {
     "data": {
      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
       "columns": [
        {
         "name": "index",
         "rawType": "int64",
         "type": "integer"
        },
        {
         "name": "comments",
         "rawType": "object",
         "type": "string"
        },
        {
         "name": "positive",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "neutral",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "negative",
         "rawType": "float64",
         "type": "float"
        },
        {
         "name": "sentiment",
         "rawType": "object",
         "type": "string"
        }
       ],
       "conversionMethod": "pd.DataFrame",
       "ref": "576a4da9-174b-4a24-9ef1-5a7050d84b42",
       "rows": [
        [
         "1",
         "The customer service was terrible and the product broke after a week.",
         "0.0020007879938930273",
         "0.017636312171816826",
         "0.9803629517555237",
         "negative"
        ],
        [
         "7",
         "This is rubbish. Complete waste of money and time.",
         "0.004193859174847603",
         "0.02323022112250328",
         "0.9725759029388428",
         "negative"
        ],
        [
         "4",
         "I'm disappointed with the quality compared to what was advertised.",
         "0.002319690538570285",
         "0.030413493514060974",
         "0.9672667980194092",
         "negative"
        ]
       ],
       "shape": {
        "columns": 5,
        "rows": 3
       }
      },
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>comments</th>\n",
       "      <th>positive</th>\n",
       "      <th>neutral</th>\n",
       "      <th>negative</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>The customer service was terrible and the prod...</td>\n",
       "      <td>0.002001</td>\n",
       "      <td>0.017636</td>\n",
       "      <td>0.980363</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>This is rubbish. Complete waste of money and t...</td>\n",
       "      <td>0.004194</td>\n",
       "      <td>0.023230</td>\n",
       "      <td>0.972576</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>I'm disappointed with the quality compared to ...</td>\n",
       "      <td>0.002320</td>\n",
       "      <td>0.030413</td>\n",
       "      <td>0.967267</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            comments  positive   neutral  \\\n",
       "1  The customer service was terrible and the prod...  0.002001  0.017636   \n",
       "7  This is rubbish. Complete waste of money and t...  0.004194  0.023230   \n",
       "4  I'm disappointed with the quality compared to ...  0.002320  0.030413   \n",
       "\n",
       "   negative sentiment  \n",
       "1  0.980363  negative  \n",
       "7  0.972576  negative  \n",
       "4  0.967267  negative  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "# Extract sentiment from comments\n",
    "df_with_sentiment = df.extract_sentiment(input_column='comments')\n",
    "\n",
    "# Display results\n",
    "print(\"\\nData with sentiment analysis:\")\n",
    "display(df_with_sentiment)\n",
    "\n",
    "# Summarize sentiment distribution\n",
    "print(\"\\nSentiment distribution:\")\n",
    "display(df_with_sentiment['sentiment'].value_counts())\n",
    "\n",
    "# Examine highest positive and negative sentiment scores\n",
    "print(\"\\nMost positive comments:\")\n",
    "display(df_with_sentiment.sort_values('positive', ascending=False).head(3)[['comments', 'positive', 'neutral', 'negative', 'sentiment']])\n",
    "\n",
    "print(\"\\nMost negative comments:\")\n",
    "display(df_with_sentiment.sort_values('negative', ascending=False).head(3)[['comments', 'positive', 'neutral', 'negative', 'sentiment']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}